In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv("C:\\Users\\AASHIMA\\Desktop\\Python\\irisflowe.csv")
In [2]:
data.head()
Out[2]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
In [3]:
data.describe()
Out[3]:
sepal_length sepal_width petal_length petal_width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667 1.198667
std 0.828066 0.433594 1.764420 0.763161
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
In [4]:
data.species.unique()
Out[4]:
array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)
In [5]:
data["Target"]=data.species.map({"Iris-setosa":0,"Iris-versicolor":1,"Iris-virginica":2})
df0=data[0:50]
df1=data[50:100]
df2=data[100:]
In [6]:
plt.xlabel("Sepal Lenght")
plt.ylabel("Sepal Width")
plt.scatter(df0["sepal_length"],df0['sepal_width'],color='green', marker='+')
plt.scatter(df1["sepal_length"],df1['sepal_width'],color='blue', marker='.')
Out[6]:
<matplotlib.collections.PathCollection at 0x1df6412b9d0>
In [7]:
plt.xlabel("Petal Lenght")
plt.ylabel("Petal  Width")
plt.scatter(df0["petal_length"],df0['petal_width'],color='green', marker='+')
plt.scatter(df1["petal_length"],df1['petal_width'],color='blue', marker='.')
Out[7]:
<matplotlib.collections.PathCollection at 0x1df641a1350>
In [8]:
import plotly.express as px
fig=px.scatter(data,x="sepal_width",y="sepal_length",color="species")
fig.show()
In [9]:
fig=px.scatter(data,x="petal_width",y="petal_length",color="species")
fig.show()

It can be concluded that Iris-Setsos', sepal_width and lenght are small as compared to other two. Iris-Virginica's both sepal lenght and width are largest among all the spieces¶

In [10]:
x = data.drop("species", axis=1)
y = data["species"]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                    test_size=0.2, 
                                                    random_state=0)

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(x_train, y_train)
print(knn.score(x_test,y_test))
1.0
In [11]:
len(x_train)
Out[11]:
120
In [12]:
len(x_test)
Out[12]:
30
In [14]:
x_new = np.array([[20.0, 15.3, 11.5, 1.2,2.0]])
prediction = knn.predict(x_new)
print("Prediction: {}".format(prediction))
Prediction: ['Iris-virginica']
C:\Users\AASHIMA\anaconda3\Lib\site-packages\sklearn\base.py:439: UserWarning:

X does not have valid feature names, but KNeighborsClassifier was fitted with feature names

In [16]:
x_new = np.array([[5, 2.9, 1, 0.2,0.5]])
prediction = knn.predict(x_new)
print("Prediction: {}".format(prediction))
Prediction: ['Iris-setosa']
C:\Users\AASHIMA\anaconda3\Lib\site-packages\sklearn\base.py:439: UserWarning:

X does not have valid feature names, but KNeighborsClassifier was fitted with feature names

In [17]:
from sklearn.metrics import confusion_matrix
y_pred=knn.predict(x_test)
cm=confusion_matrix(y_test,y_pred)
cm
Out[17]:
array([[11,  0,  0],
       [ 0, 13,  0],
       [ 0,  0,  6]], dtype=int64)
In [18]:
import seaborn as sns
plt.figure(figsize=(7,5))
sns.heatmap(cm,annot=True)
plt.xlabel("Prerdicted")
plt.ylabel("Truth")
#Diagonal values are correct prediicted values whereas other values shows that they are incorrect values
Out[18]:
Text(58.222222222222214, 0.5, 'Truth')
In [19]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        11
Iris-versicolor       1.00      1.00      1.00        13
 Iris-virginica       1.00      1.00      1.00         6

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

In [ ]: